{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import mne\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy import interpolate\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# root dir\n",
    "root = 'ADFD/'\n",
    "# participants file path\n",
    "participants_path = os.path.join(root, 'participants.tsv')\n",
    "participants = pd.read_csv(participants_path, sep='\\t')\n",
    "participants"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "labels = np.empty(shape=(participants.shape[0],2), dtype='int32')\n",
    "labels.shape"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "label_map = {'A':2, 'F':1, 'C':0}\n",
    "for i, participant in enumerate(participants.values):\n",
    "    # print(participant)\n",
    "    pid = int(participant[0][-3:])\n",
    "    label = label_map[participant[3]]\n",
    "    # print(pid)\n",
    "    # print(label)\n",
    "    labels[i,0] = label\n",
    "    labels[i,1] = pid"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "label_path = 'Label'\n",
    "if not os.path.exists(label_path):\n",
    "    os.makedirs(label_path)\n",
    "np.save(label_path + '/label.npy', labels)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "np.load('Label/label.npy')"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "derivatives_root = os.path.join(root, 'derivatives/')\n",
    "derivatives_root"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Test for bad channels, sampling freq and shape\n",
    "bad_channel_list, sampling_freq_list, data_shape_list = [], [], []\n",
    "for sub in os.listdir(derivatives_root):\n",
    "    if 'sub-' in sub:\n",
    "        sub_path = os.path.join(derivatives_root, sub, 'eeg/')\n",
    "        # print(sub_path)\n",
    "        for file in os.listdir(sub_path):\n",
    "            if '.set' in file:\n",
    "                file_path = os.path.join(sub_path, file)\n",
    "                raw = mne.io.read_raw_eeglab(file_path, preload=False)\n",
    "                # get bad channels\n",
    "                bad_channel = raw.info['bads']\n",
    "                bad_channel_list.append(bad_channel)\n",
    "                # get sampling frequency\n",
    "                sampling_freq = raw.info['sfreq']\n",
    "                sampling_freq_list.append(sampling_freq)\n",
    "                # get eeg data\n",
    "                data = raw.get_data()\n",
    "                data_shape = data.shape\n",
    "                data_shape_list.append(data_shape)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 0 bad channels\n",
    "print(bad_channel_list)\n",
    "# 500 Hz for all runs\n",
    "print(sampling_freq_list)\n",
    "# same number of channels & different timestamps\n",
    "print(data_shape_list)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# EEG data preprocessing\n",
    "\n",
    "# resampling to 256Hz\n",
    "def resampling(array, freq=500, kind='linear'):\n",
    "    t = np.linspace(1, len(array), len(array))\n",
    "    f = interpolate.interp1d(t, array, kind=kind)\n",
    "    t_new = np.linspace(1, len(array), int(len(array)/freq * 256))\n",
    "    new_array = f(t_new)\n",
    "    return new_array\n",
    "\n",
    "# segmentation with no overlapping (2560 timestamps)\n",
    "# start from the middle position\n",
    "def segment(df, window_size=2560):\n",
    "    res = []\n",
    "    start = int(df.shape[0]/2)\n",
    "    left_index = start - int(start/window_size) * window_size\n",
    "    right_index = start + int((df.shape[0]-start)/window_size) * window_size\n",
    "    for i in range(left_index, right_index, window_size):\n",
    "        res.append(df.iloc[i: i+window_size, :])\n",
    "    return res\n",
    "\n",
    "\n",
    "def eeg_data(eeg_path):\n",
    "    # read .set file\n",
    "    raw = mne.io.read_raw_eeglab(eeg_path, preload=False)\n",
    "    # raw = raw.pick(picks=li_common_channels)\n",
    "    signals = raw.get_data()\n",
    "    trial = []\n",
    "    for i in range(signals.shape[0]):\n",
    "        data = resampling(signals[i], freq=500, kind='linear')\n",
    "        trial.append(data)\n",
    "    #print(data.shape)\n",
    "    df = pd.DataFrame(trial)\n",
    "    df = np.transpose(df)\n",
    "    # segmentation\n",
    "    # res_df = segment(df, window_size=2560)\n",
    "    res_df = segment(df, window_size=256)\n",
    "    return res_df"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "feature_path = 'Feature'\n",
    "if not os.path.exists(feature_path):\n",
    "    os.makedirs(feature_path)\n",
    "\n",
    "sub_id = 1\n",
    "for sub in os.listdir(derivatives_root):\n",
    "    if 'sub-' in sub:\n",
    "        li_sub = []\n",
    "        sub_path = os.path.join(derivatives_root, sub, 'eeg/')\n",
    "        # print(sub_path)\n",
    "        for file in os.listdir(sub_path):\n",
    "            if '.set' in file:\n",
    "                file_path = os.path.join(sub_path, file)\n",
    "                res_df = eeg_data(file_path)\n",
    "                for df_std in res_df:\n",
    "                    print(df_std)\n",
    "                    print('--------------------------------------------------------------------------')\n",
    "                    li_sub.append(df_std.values)\n",
    "        array_sub = np.array(li_sub)\n",
    "        print(array_sub.shape)\n",
    "        np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), array_sub)\n",
    "        sub_id += 1"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Test the saved npy file\n",
    "# example\n",
    "\n",
    "path = 'Feature/'\n",
    "\n",
    "for file in os.listdir(path):\n",
    "    sub_path = os.path.join(path, file)\n",
    "    print(np.load(sub_path).shape)"
   ],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
